library(gprofiler2)
library(viridis)
library(stringr)
library(signatureSearch)
library(ggplot2)
library(dplyr)
library(here)
library(viridis)
Enrichment function for up and down genes
enrich <- function(upgenes, downgenes, topreturned) {#, filepath) {
up_fea <- gost(upgenes, multi_query = FALSE, correction_method = "bonferroni", evcodes = TRUE)
#remove abitrary pathways -- any pathways with > 1000 genes
up_fea <- up_fea$result %>% dplyr::filter(., term_size < 1000 & term_size > 5) %>% dplyr::mutate(., .keep = "all", direction = "upregulated")
up_mat <- up_fea %>% dplyr::arrange(., desc(intersection_size)) %>% dplyr::slice_head(., n = topreturned)
down_fea <- gost(downgenes, multi_query = FALSE, correction_method = "bonferroni", evcodes = TRUE)
#remove abitrary pathways -- any pathways with > 1000 genes
down_fea <- down_fea$result %>% dplyr::filter(., term_size < 1000 & term_size > 5) %>% dplyr::mutate(., .keep = "all", direction = "downregulated")
down_mat <- down_fea %>% dplyr::arrange(., desc(intersection_size)) %>% dplyr::slice_head(., n = topreturned)
combined_fea <- rbind(up_mat, down_mat)
fea_res <- list(downregulated = down_fea, upregulated = up_fea, topcompared = combined_fea)
return(fea_res)
}
MOUSE GENES Enrichment function for up and down genes
#Runs an ordered query
fea <- function(genelist, topreturned){
fea <- gost(genelist, organism = "mmusculus", ordered_query = FALSE, multi_query = FALSE, evcodes = TRUE, correction_method = "bonferroni", sources = c("GO:BP", "GO:MF", "GO:CC", "KEGG", "REAC", "MIRNA", "CORUM", "HP", "HPA", "WP"))
#remove abitrary pathways -- any pathways with > 1000 genes
fea <- fea$result %>% dplyr::filter(., term_size < 1000 & term_size > 10)
return(fea)
}
mouseenrich <- function(upgenes, downgenes, topreturned) {
upgenes$LFC <- sort(as.numeric(upgenes$LFC), decreasing = TRUE)
up_fea <- fea(upgenes$Mouse.gene.stable.ID, topreturned$Mouse.gene.stable.ID)
up_fea <- dplyr::mutate(up_fea, .keep = "all", direction = "upregulated")
downgenes$LFC <- sort(as.numeric(downgenes$LFC))
down_fea <- fea(downgenes$Mouse.gene.stable.ID, topreturned$Mouse.gene.stable.ID)
down_fea <- dplyr::mutate(down_fea, .keep = "all", direction = "downregulated")
up_mat <- up_fea %>% dplyr::arrange(., desc(recall)) %>% dplyr::slice_head(., n = topreturned)
down_mat <- down_fea %>% dplyr::arrange(., desc(recall)) %>% dplyr::slice_head(., n = topreturned)
combined_fea <- rbind(up_mat, down_mat)
fea_res <- list(downregulated = down_fea, upregulated = up_fea, topcompared = combined_fea)
return(fea_res)
}
Custom background for FEA (for enrichment using mouse to human to entrez to lincs conversion – background gene list is all possible measured genes that have gone through those conversions)
#Needs to only be possible genes -- all genes measured in salmon gene counts that map to human entrez
#mesgenes <- data@NAMES
#remove version numbers
#mesgenes <- gsub("\\..*", "", mesgenes)
#genebg <- convertMouseGeneList(mesgenes)
#write.csv2(genebg, file = "./data/background_genelist_genestoLINCSgenes.csv")
genebg <- read.csv(file = here("data", "background_genelist_genestoLINCSgenes.csv"))
bubbleplot, up and down for x axis and terms on y axis
bubbleplot <- function(combined_fea){
bubbleplot <- ggplot(combined_fea, aes(x=direction, y=term_name, size = intersection_size, fill = p_value)) +
geom_point(alpha=0.7, shape = 21) +
scale_size(range = c(2, 10), name = "# Genes Matched to Term") +
scale_fill_distiller(palette = "Purples") +
labs(x = "Differential Expression Direction", y = "Functional Enrichment Terms") +
theme_minimal(base_size = 8) #+ labs(title = "Top Enriched Terms")
# ggsave(filepath, bubbleplot, width = 10, height = 7)
return(bubbleplot)
}
barplot, up and down merged, colored by p-value, terms on y axis and # genes on x axis
barplot <- function(combined_fea){
barplot <- ggplot(combined_fea, aes(x=intersection_size, y=term_name, fill = recall)) +
geom_bar( stat = "identity") +
scale_fill_continuous(type = "viridis") +
labs(x = "Number of Genes Matched to Term", y = "Functional Terms") +
theme_minimal(base_size = 8)
return(barplot)
}
subsetting terms that are mitochondria dysfunction-related
subsetting terms that are ciliopathy-related terms
Subsetting for fibrosis-realted terms
immune terms Subsetting for fibrosis-realted terms
subsetting terms that are SKF-96365 related (a store-operated Ca2+ entry (SOCE) inhibitor)
### Load in (top 100 human) data
Load in top 100 up and down genes used as signatures for signatureSearch
’39 precystic p70
hom_degs_UP <- read.csv(here("res", "deseq2_outputs", "GSE149739_degs_UP_220421.csv"))
hom_degs_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE149739_degs_DOWN_220421.csv"))
’19 cystic p28
hom_19_UP <- read.csv(here("res", "deseq2_outputs", "GSE134719_degs_UP_220421.csv"))
hom_19_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE134719_degs_DOWN_220421.csv"))
’56 cystic p21
hom_56_UP <- read.csv(here("res", "deseq2_outputs", "GSE69556_degs_UP_220421.csv"))
hom_56_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE69556_degs_DOWN_220421.csv"))
Comparing results across datasets
“Hom” refers to homosapiens – genes that have been mapped from mouse to human ### Compare number of DEGs - datasets
fea_nf_GSE149739 <- enrich(as.character(hom_degs_UP$entrezgene_id), as.character(hom_degs_DOWN$entrezgene_id), 30)
Barplot of overall top enrichment terms
bp_39 <- barplot(fea_nf_GSE149739$topcompared)
bp_39
Bubbleplot of cilia-related enriched terms
Bubbleplot of mitochondria-related enriched terms
SOCE-related pathways for ’19 and ’56 overlap
fea_GSE69556 <- enrich(as.character(hom_56_UP$entrezgene_id), as.character(hom_56_DOWN$entrezgene_id), 30)
Barplot of overall top enrichment terms
bp_56 <- barplot(fea_GSE69556$topcompared)
bp_56
Bubbleplot of cilia-related enriched terms
Bubbleplot of mitochondria-related enriched terms
SOCE-related pathways for ’19 and ’56 overlap
fea_GSE134719 <- enrich(as.character(hom_19_UP$entrezgene_id), as.character(hom_19_DOWN$entrezgene_id), 30)
Barplot of overall top enrichment terms
bar19 <- barplot(fea_GSE134719$topcompared)
bar19
Bubbleplot of cilia-related enriched terms
Bubbleplot of mito-related enriched terms
SOCE-related pathways for ’19 and ’56 overlap
Enrichment comparisons
Save
Using ALL mouse DEGs (0.05 alpha and -2 and 2 LFC cutoff) before mapping to human genes or selecting top 100 available in LINCS
’39 nf-core aligned
musdegs_39_UP <- read.csv(here("res", "deseq2_outputs", "GSE149739_mus_degs_UP_220421.csv"))
musdegs_39_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE149739_mus_degs_DOWN_220421.csv"))
’19
musdegs_19_UP <- read.csv(here("res", "deseq2_outputs", "GSE134719_mus_degs_UP_220421.csv"))
musdegs_19_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE134719_mus_degs_DOWN_220421.csv"))
’56
musdegs_56_UP <- read.csv(here("res", "deseq2_outputs", "GSE69556_mus_degs_UP_220421.csv"))
musdegs_56_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE69556_mus_degs_DOWN_220421.csv"))
degs_19_56_overlap_up <- merge(musdegs_19_UP, musdegs_56_UP, by = "Mouse.gene.stable.ID")
degs_19_56_overlap_down <- merge(musdegs_19_DOWN, musdegs_56_DOWN, by = "Mouse.gene.stable.ID")
degs_19_56_39_overlap_up <- merge(degs_19_56_overlap_up, musdegs_39_UP, by = "Mouse.gene.stable.ID")
degs_19_56_39_overlap_down <- merge(degs_19_56_overlap_down, musdegs_39_DOWN, by = "Mouse.gene.stable.ID")
degs_39_56_overlap_up <- merge(musdegs_39_UP, musdegs_56_UP, by = "Mouse.gene.stable.ID")
degs_39_56_overlap_down <- merge(musdegs_39_DOWN, musdegs_56_DOWN, by = "Mouse.gene.stable.ID")
degs_39_19_overlap_up <- merge(musdegs_39_UP, musdegs_19_UP, by = "Mouse.gene.stable.ID")
degs_39_19_overlap_down <- merge(musdegs_39_DOWN, musdegs_19_DOWN, by = "Mouse.gene.stable.ID")
fea_mouse_39 <- mouseenrich(musdegs_39_UP, musdegs_39_DOWN, 30)
bp_mouse_39_overall <- barplot(dplyr::filter(fea_mouse_39$topcompared, intersection_size > 10)) + ggtitle("Pre-Cystic Top Upregulated ")
bp_mouse_39_overall
fea_mouse_39$topcompared$term_name <- stringr::str_wrap(fea_mouse_39$topcompared$term_name, width = 60)
p70_fea_plot <- ggplot(dplyr::filter(fea_mouse_39$topcompared, intersection_size > 10), aes(x=intersection_size, y=term_name, fill = recall)) +
geom_bar( stat = "identity") +
scale_fill_continuous(type = "viridis") +
labs(x = "Number of Genes Matched to Term", y = "Functional Terms", title = "Pre-Cystic Top Upregulated Enrichment") +
theme(text = element_text(size = 14), plot.title = element_text(size = 24))# +
#theme_minimal(base_size = 8)
p70_fea_plot
bubbleplot
bubbleplot_p70 <- bubbleplot(dplyr::filter(fea_mouse_39$topcompared, intersection_size > 6)) + labs(title = "Top Enriched Terms for Pre-Cystic P70 Dataset") + theme(text = element_text(size = 14), plot.title = element_text(size = 24))
bubbleplot(fea_mouse_39$topcompared) + labs(title = "Top Enriched Terms for Pre-Cystic P70 Dataset") + theme(text = element_text(size = 14), plot.title = element_text(size = 24))
ggsave(p70_fea_plot, filename = here("res", "fea", "p70_fea_plot.pdf"))
## Saving 7 x 5 in image
ggsave(bubbleplot_p70, filename = here("res", "fea", "p70_fea_bubbleplot.pdf"), height = 7, width = 14)
bubbleplot top
bubbleplot(dplyr::filter(fea_mouse_39$topcompared, intersection_size > 10)) #+ labs(title = "")
Cilia-related pathways
Mitochondria-related pathways
Store-operated CA 2+ Entry pathways
Fibrosis-related terms
Immune-related terms
Save plots
ggsave(filename = here("res", "fea", "mouse_39_overall.pdf"), bp_mouse_39_overall)
## Saving 7 x 5 in image
fea_mouse_56 <- mouseenrich(musdegs_56_UP, musdegs_56_DOWN, 30)
bp_mouse_56_overall <- barplot(dplyr::filter(fea_mouse_56$topcompared, intersection_size > 10))
bp_mouse_56_overall
bubbleplot
fea_mouse_56$topcompared$term_name <- stringr::str_wrap(fea_mouse_56$topcompared$term_name, width = 70)
bubbleplot_p21 <- bubbleplot(dplyr::filter(fea_mouse_56$topcompared, intersection_size > 10)) + labs(title = "Top Enriched Terms for Cystic P21 Dataset") + theme(text = element_text(size = 14), plot.title = element_text(size = 24))
ggsave(bubbleplot_p21, filename = here("res", "fea", "p21_fea_bubbleplot.pdf"), height = 7, width = 14)
Cilia-related pathways
Mitochondria-related pathways
Fibrosis-related terms
Immune-related terms
Store-operated CA 2+ Entry pathways
Save plots
ggsave(filename = here("res", "fea", "mouse_56_overall.pdf"), bp_mouse_56_overall)
## Saving 7 x 5 in image
fea_mouse_19 <- mouseenrich(musdegs_19_UP, musdegs_19_DOWN, 40)
bp_mouse_19 <- barplot(fea_mouse_19$topcompared)
bp_mouse_19
bubbleplot
fea_mouse_19$topcompared$term_name <- stringr::str_wrap(fea_mouse_19$topcompared$term_name, width = 70)
bubbleplot_p28 <- bubbleplot(dplyr::filter(fea_mouse_19$topcompared, intersection_size > 10)) + labs(title = "Top Enriched Terms for Cystic P28 Dataset") + theme(text = element_text(size = 14), plot.title = element_text(size = 24))
ggsave(bubbleplot_p28, filename = here("res", "fea", "p28_fea_bubbleplot.pdf"), height = 7, width = 14)
Cilia-related pathways
Mitochondria-related pathways
Fibrosis-related terms
Immune-related terms
Store-operated CA 2+ Entry pathways
Save gprofiler results
Save plots
ggsave(filename = here("res", "fea", "mouse_19_overall.pdf"), bp_mouse_19)
## Saving 7 x 5 in image
read in
fea_56_up <- read.csv(here("res", "fea", "fea_mouse_56_upterms.csv"))
fea_56_down <- read.csv(here("res", "fea", "fea_mouse_56_downterms.csv"))
fea_19_up <- read.csv(here("res", "fea", "fea_mouse_19_upterms.csv"))
fea_19_down <- read.csv(here("res", "fea", "fea_mouse_19_downterms.csv"))
fea_39_up <- read.csv(here("res", "fea", "fea_mouse_39_upterms.csv"))
fea_39_down <- read.csv(here("res", "fea", "fea_mouse_39_downterms.csv"))
merge for overlap
fea_cystic_overlap_up <- merge(fea_56_up, fea_19_up, by = "term_name")
fea_cystic_overlap_down <- merge(fea_56_down, fea_19_down, by = "term_name")
merge for overlap across all datasets
fea_all_overlap_up <- merge(fea_cystic_overlap_up, fea_39_up, by = "term_name")
fea_all_overlap_down <- merge(fea_cystic_overlap_down, fea_39_down, by = "term_name")
cystic up overlap
ggplot(slice_max(fea_cystic_overlap_up, order_by = recall.y, n = 30), aes(x=intersection_size.y, y=term_name, fill = p_value.y)) +
geom_bar( stat = "identity") +
scale_fill_continuous(type = "viridis") +
labs(x = "Number of Genes Matched to Term", y = "Functional Terms", title = "Top Enriched Terms") +
theme_minimal(base_size = 8)
cystic down overlap
ggplot(slice_max(fea_cystic_overlap_down, order_by = recall.y, n = 30), aes(x=intersection_size.y, y=term_name, fill = p_value.y)) +
geom_bar( stat = "identity") +
scale_fill_continuous(type = "viridis") +
labs(x = "Number of Genes Matched to Term", y = "Functional Terms", title = "Top Enriched Terms") +
theme_minimal(base_size = 8)
fea_cystic_overlap_combined <- rbind(fea_cystic_overlap_up, fea_cystic_overlap_down)
fea_cystic_overlap_combined <- fea_cystic_overlap_combined %>% rowwise() %>% mutate(total_genes = sum(intersection_size.x, intersection_size.y))
ggplot(slice_max(fea_cystic_overlap_combined, order_by = recall.x, n = 10), aes(x=direction.x, y=term_name, size = total_genes, fill = p_value.y)) +
geom_point(alpha=0.7, shape = 21) +
scale_size(range = c(2, 10), name = "# Genes Matched to Term") +
scale_fill_distiller(palette = "Purples") +
labs(x = "Differential Expression Direction", y = "Functional Enrichment Terms") +
theme_minimal(base_size = 8)
write.csv(fea_cystic_overlap_up, file = here("res", "fea", "fea_cystic_overlap_up.csv"))
write.csv(fea_cystic_overlap_down, file = here("res", "fea", "fea_cystic_overlap_down.csv"))
write.csv(fea_cystic_overlap_combined, file = here("res", "fea", "fea_cystic_overlap_combined.csv"))
Term overlap
fea_overlap_19_56_up <- merge(fea_mouse_19$upregulated, fea_mouse_56$upregulated, by = "term_name")
fea_overlap_19_56_down <- merge(fea_mouse_19$downregulated, fea_mouse_56$downregulated, by = "term_name")
#fea_overlap_19_56 <- list(upregulated = fea_overlap_19_56_up, downregulated = fea_overlap_19_56_down)
fea_overap_all_up <- merge(fea_mouse_39$upregulated, fea_overlap_19_56_up, by = "term_name")
fea_overap_all_down <- merge(fea_mouse_39$downregulated, fea_overlap_19_56_down, by = "term_name")
Save
fea_overlap_19_56_upterms <- apply(fea_overlap_19_56_up, 2, as.character)
write.csv(fea_overlap_19_56_upterms, file = here("res", "fea", "fea_overlap_19_56_upterms.csv"))
fea_overlap_19_56_downterms <- apply(fea_overlap_19_56_down, 2, as.character)
write.csv(fea_overlap_19_56_downterms, file = here("res", "fea", "fea_overlap_19_56_downterms.csv"))
fea_overap_all_upterms <- apply(fea_overap_all_up, 2, as.character)
write.csv(fea_overap_all_upterms, file = here("res", "fea", "fea_overap_all_upterms.csv"))
fea_overap_all_downterms <- apply(fea_overap_all_down, 2, as.character)
write.csv(fea_overap_all_downterms, file = here("res", "fea", "fea_overap_all_downterms.csv"))
Mitochondria-related terms overlap
R.Version()
## $platform
## [1] "x86_64-apple-darwin17.0"
##
## $arch
## [1] "x86_64"
##
## $os
## [1] "darwin17.0"
##
## $system
## [1] "x86_64, darwin17.0"
##
## $status
## [1] ""
##
## $major
## [1] "4"
##
## $minor
## [1] "2.0"
##
## $year
## [1] "2022"
##
## $month
## [1] "04"
##
## $day
## [1] "22"
##
## $`svn rev`
## [1] "82229"
##
## $language
## [1] "R"
##
## $version.string
## [1] "R version 4.2.0 (2022-04-22)"
##
## $nickname
## [1] "Vigorous Calisthenics"
installed.packages()[names(sessionInfo()$otherPkgs), "Version"]
## here dplyr ggplot2
## "1.0.1" "1.0.9" "3.3.6"
## signatureSearch SummarizedExperiment Biobase
## "1.10.0" "1.26.1" "2.56.0"
## GenomicRanges GenomeInfoDb IRanges
## "1.48.0" "1.32.2" "2.30.0"
## S4Vectors BiocGenerics MatrixGenerics
## "0.34.0" "0.42.0" "1.8.1"
## matrixStats Rcpp stringr
## "0.62.0" "1.0.9" "1.4.0"
## viridis viridisLite gprofiler2
## "0.6.2" "0.4.0" "0.2.1"